# 获取网页源代码
import requests
url = 'http://pc.weathercn.com/air/rank/?order=0'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'}
response = requests.get(url=url, headers=headers)
response.encoding = response.apparent_encoding
html_code = response.text
file_path = 'AQI.txt'
with open(file=file_path, mode='w', encoding='utf-8') as f:
    f.write(html_code)
    
# 提取所有数据表格
import pandas as pd
table_list = pd.read_html(io=file_path)
# 或者
# from io import StringIO
# table_list = pd.read_html(io=StringIO(html_code))
print(len(table_list))

# 提取包含指定关键词的表格
table_list = pd.read_html(io=file_path, match='空气质量状况')
print(len(table_list))
data = table_list[0]
print(data)

# 提取指定属性值的表格
table_list = pd.read_html(io=file_path, attrs={'class': 'air-trend-list'})
data = table_list[0]
print(data)
